/*
 * Header file for all AVC INTER prediction kernels
 * Copyright © <2010>, Intel Corporation.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * This file was originally licensed under the following license
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */
#if !defined(__INTER_HEADER__)	// Make sure this file is only included once
#define __INTER_HEADER__

// Module name: inter_header.inc
//
// Header file for all AVC INTER prediction kernels
//

#define INTER_KERNEL

//-------------------------------------------------------------------------------------------
// TODO: The followings will be merged with the above definitions later
//-------------------------------------------------------------------------------------------


//------------ Input parameters & bit masks

// SW WA for weighted prediction - 2007/09/06	
//.declare	guwR1			Base=r1 ElementSize=2 Type=uw	
//.declare	guwW128			Base=r63.13 ElementSize=2 Type=uw

#ifdef DEV_ILK
// #define SW_W_128		// Enable SW WA for special Weight=128 case. Can be commented to disable it
#else	// Pre DEV_ILK
#define SW_W_128		// Enable SW WA for special Weight=128 case.
#endif	// DEV_ILK

#ifdef	SW_W_128
.declare	gudW128			Base=r1.0 ElementSize=4 Type=ud
#else
#endif	// SW_W_128

#define		gORIX			r3.4				// :ub, X origin
#define		gORIY			r3.5				// :ub, Y origin

#define		gCBP			r3.9				// :ub, CBP (0, 0, Y0, Y1, Y2, Y3, Cb, Cr)
#define		nCBPY_MASK		0x3c
#define		nCBPU_MASK		0x2
#define		nCBPV_MASK		0x1

#define		gFIELDFLAGS		r3.1				// :uw - To compute message descriptor for write

#define		gMBTYPE			r3.1				// :ub, MB type
#define		nMBTYPE_MASK	0x1f
#define		gFIELDMBFLAG	r3.1				// :ub, Field MB flag
#define		nFIELDMB_MASK	0x40
#define		gMBPARITY		r3.3				// :ub, Bottom field flag
#define		nMBPARITY_MASK	0x01

#define		gWPREDFLAG		r3.0				// :ub, Weighted pred flag
#define		nWBIDIR_MASK	0xc0

#define		gSUBMB_SHAPE	r3.12				// :ub, Sub-MB shape
#define		gSUBMB_MODE		r3.13				// :ub, Sub-MB prediction mode
.declare	guwSUBMB_SHAPE_MODE	Base=r3.6 ElementSize=2 Type=uw

#define		gYWDENOM		r3.14				// :ub, Luma log2 weight denom
#define		gCWDENOM		r3.15				// :ub, Chroma log2 weight denom

#define		gADDR			r3.24				// :ub, Register addresses of error data / MV

.declare	gubBIDX			Base=r3.16 ElementSize=1 Type=ub

#define		gWGT			r8					// Weights/offsets
.declare    gdWGT			Base=r8  ElementSize=4 Type=d
.declare    gwWGT			Base=r8  ElementSize=2 Type=w
#define		gMV				r4					// MVs
.declare    gwMV			Base=r4  ElementSize=2 Type=w
.declare    gdMV			Base=r4  ElementSize=4 Type=d

.declare	gwERRORY		Base=r10 ElementSize=2 Type=w		// 16 GRFs
.declare	gubERRORY		Base=r10 ElementSize=1 Type=ub
.declare	gwERRORC		Base=r26 ElementSize=2 Type=w		// 8 GRFs
.declare	gubERRORC		Base=r26 ElementSize=2 Type=ub

//------------ Address registers
#define		pMSGDSC			a0.0				// ud: Must be the leading dword of the address register
#define		pREF			a0.0

#define		pBIDX			a0.2				
#define		pWGT			a0.3
#define		pERRORYC		a0.2				// :ud	
#define		pERRORY			a0.4
#define		pERRORC			a0.5
#define		pMV				a0.6	

#define		pWGT_BIDX		a0.1				// :ud, WGT & BIDX
#define		pRECON_MV		a0.3				// :ud, RECON & MV

#define		pREF0			a0.0				// :uw
#define		pREF0D			a0.0				// :ud
#define		pREF1			a0.1
#define		pREF2			a0.2
#define		pREF2D			a0.1				// :ud
#define		pREF3			a0.3
#define		pREF4			a0.4
#define		pREF4D			a0.2				// :ud
#define		pREF5			a0.5
#define		pREF6			a0.6
#define		pREF6D			a0.3				// :ud
#define		pREF7			a0.7

#define		pRES			a0.6
#define		pRESD			a0.3				// :ud
#define		pRESULT			a0.7

#define		p0				a0.0
#define		p1				a0.1

//------------ Constants for static/inline/indirect
#define		nOFFSET_BIDX	112					// = 32*3+4*4

#define		nOFFSET_WGT		256					// = 32*8
#define		nOFFSET_WGT_BIDX 0x01000070			// = (256<<16)+112
#define		nOFFSET_ERROR	0x03400140			// = (320+128*4)<<16+320=0x03400140
#define		nOFFSET_ERRORY	0x0140
#define		nOFFSET_ERRORC	0x0340
#define		nOFFSET_MV		128					// = 32*4
#define		nOFFSET_RECON_MV 0x04400080			// = (1088<<16)+128		// TODO: OFFSET_RECON is obsolete

//------------ Constants for kernel internal variables
#define		nOFFSET_INTPY0	0x0640				// = 32*50
#define		nOFFSET_INTPY1	0x0780				// = 32*60
#define		nOFFSET_INTPC0	0x06c0				// = 32*54
#define		nOFFSET_INTPC1	0x0480				// = 32*36
#define		nOFFSET_INTP0	0x06c00640
#define		nOFFSET_INTP1	0x04800780

#define		nOFFSET_INTERIM		0x0480				// = 32*36
#define		nOFFSET_INTERIM2	0x04A00480			// = ((32*37)<<16)|(32*36)
#define		nOFFSET_INTERIM3	0x04A00480			// = ((32*36+32)<<16)|(32*36)
#define		nOFFSET_INTERIM4	0x04A00490			// = ((32*37)<<16)|(32*36+16)

#define		nOFFSET_INTERIM4x4		0x04C0			// = 32*38
#define		nOFFSET_INTERIM4x4_4	0x04E004D0		// = ((32*38+32)<<16)|(32*38+16)
#define		nOFFSET_INTERIM4x4_5	0x04D004C0		// = ((32*38+16)<<16)|(32*38)
#define		nOFFSET_INTERIM4x4_6	0x04E004C0		// = ((32*38+32)<<16)|(32*38)
#define		nOFFSET_INTERIM4x4_7	0x04D004C8		// = ((32*38+16)<<16)|(32*38+8)
#define		nOFFSET_INTERIM4x4_8	0x04E004D8		// = ((32*38+32)<<16)|(32*38+24)
#define		nOFFSET_INTERIM4x4_9	0x04F004E8		// = ((32*38+48)<<16)|(32*38+40)

#define		nOFFSET_RES		0x540				// = 32*42
#define		nOFFSET_REF		0x560				// = 32*43
#define		nOFFSET_REFC	0x700				// = 32*56

			// Binding table index
#define		nBDIX_DESTY		0
#define		nBDIX_DESTC		1
#define		nBI_LC_DIFF		0x10				// Binding table index diff between luma and chroma

#define		nGRFWIB			32
#define		nGRFHWIB		16

//------------ Regions

.declare    gudREF			Base=r43 ElementSize=4 SrcRegion=<16;16,1> Type=ud
.declare    gubREF			Base=r43 ElementSize=1 Type=ub
.declare    gudREFC			Base=r56 ElementSize=4 SrcRegion=<16;16,1> Type=ud

// 16x16 handling
.declare    gudREF21x21		Base=r58 ElementSize=4 SrcRegion=<16;16,1> Type=ud
.declare    gudREF18x10		Base=r66 ElementSize=4 SrcRegion=<16;16,1> Type=ud
.declare    gubREF18x10		Base=r66 ElementSize=1 SrcRegion=<16;16,1> Type=ub



.declare    gudREF16x16		Base=r38 ElementSize=4 Type=ud			// 8 GRFs
.declare    gubREF16x16		Base=r38 ElementSize=1 Type=ub
.declare    gudREFC16x8		Base=r46 ElementSize=4 Type=ud			// 4 GRFs
.declare    gubREFC16x8		Base=r46 ElementSize=1 Type=ub

// TODO
.declare    gubAVG			Base=r56 ElementSize=1 Type=ub
.declare	gubREFY_BWD		Base=r64 ElementSize=1 Type=ub
.declare	gubREFC_BWD		Base=r72 ElementSize=1 Type=ub


.declare    guwINTPY0		Base=r50 ElementSize=2 SrcRegion=<16;16,1> Type=uw
.declare	gudINTPY0		Base=r50 ElementSize=4 Type=ud
.declare    gubINTPY0		Base=r50 ElementSize=1 SrcRegion=<32;16,2> Type=ub
.declare    guwINTPY1		Base=r60 ElementSize=2 SrcRegion=<16;16,1> Type=uw
.declare	gudINTPY1		Base=r60 ElementSize=4 Type=ud
.declare    gubINTPY1		Base=r60 ElementSize=1 SrcRegion=<32;16,2> Type=ub
.declare    guwYPRED		Base=r50 ElementSize=2 SrcRegion=<8;8,1> Type=uw
.declare    gubYPRED		Base=r50 ElementSize=1 SrcRegion=<32;16,2> Type=ub

.declare    guwINTPC0		Base=r54 ElementSize=2 SrcRegion=<16;16,1> Type=uw
.declare    gwINTPC0		Base=r54 ElementSize=2 SrcRegion=<16;16,1> Type=w
.declare	gudINTPC0		Base=r54 ElementSize=4 Type=ud
.declare    gubINTPC0		Base=r54 ElementSize=1 SrcRegion=<32;16,2> Type=ub
.declare    guwINTPC1		Base=r36 ElementSize=2 SrcRegion=<16;16,1> Type=uw
.declare	gudINTPC1		Base=r36 ElementSize=4 Type=ud
.declare    gubINTPC1		Base=r36 ElementSize=1 SrcRegion=<32;16,2> Type=ub
.declare    guwCPRED		Base=r54 ElementSize=2 SrcRegion=<16;8,2> Type=uw
.declare    gubCPRED		Base=r54 ElementSize=1 SrcRegion=<32;8,4> Type=ub

#define		gINTERIM		r36
.declare	gubINTERIM_BUF	Base=r36 ElementSize=1 SrcRegion=<32;16,2> Type=ub
#define		gINTERIM4x4		r38
.declare	gubINTERIM4x4_BUF Base=r38 ElementSize=1 SrcRegion=<32;16,2> Type=ub
.declare	gwINTERIM4x4_BUF Base=r38 ElementSize=2  Type=w

.declare	gubINTERIM_BUF2	Base=r42 ElementSize=1 SrcRegion=<8;4,2> Type=ub
.declare	gwINTERIM_BUF2	Base=r42 ElementSize=2 SrcRegion=<16;16,1> Type=w	
.declare	guwINTERIM_BUF2	Base=r42 ElementSize=2 Type=uw	

.declare	gwINTERIM_BUF3	Base=r38 ElementSize=2 SrcRegion=<16;16,1> Type=w		// 2 GRFs
.declare	gubINTERIM_BUF3	Base=r38 ElementSize=1 Type=ub							

.declare	gwTEMP			Base=r42 ElementSize=2 SrcRegion=<16;16,1> Type=w

//------------ General registers

#define		gX				r3.2				// w
#define		gY				r3.3				// w

#define		gMSGDSC_R		r3.6				// ud
#define		gMSGDSC_W		r3.7				// ud

#ifdef	SW_W_128
.declare	gwMBTYPE		Base=r8.6 ElementSize=2 Type=w			// Shared with gLOOP_SUBMB

// TODO
#define		gLOOP_SUBMB		r8.6
#define		gLOOP_SUBMBPT	r8.7
#define		gLOOP_DIR		r9.6
#define		gLOOPCNT		r9.7				// Loop counter for submodules
#else
.declare	gwMBTYPE		Base=r1.0 ElementSize=2 Type=w			// Shared with gLOOP_SUBMB

// TODO
#define		gLOOP_SUBMB		r1.0
#define		gLOOP_SUBMBPT	r1.1
#define		gLOOP_DIR		r8.7
#define		gLOOPCNT		r9.7				// Loop counter for submodules
#endif	// SW_W_128

#define		gW0				r34.6				// Temporary WORD 
#define		gW1				r34.7				// Temporary WORD 
#define		gW2				r34.8				// Temporary WORD 
#define		gW3				r34.9				// Temporary WORD 
#define		gD0				r34.3				// Temporary DWORD

#define		gW4				r34.15

//

#define		gMVX_INT		r34.0				// :w
#define		gMVY_INT		r34.1				// :w
#define		gMVX_FRAC		r34.2				// :w
#define		gMVY_FRAC		r34.3				// :w
#define		gMVX_FRACC		r34.4				// :w
#define		gMVY_FRACC		r34.5				// :w

#define		gpINTPY			r34.10
#define		gpINTPC			r34.11
#define		gpINTP			r34.5				// DW

#define		gPREDFLAG		r34.12
#define		gBIDX			r34.13
#define		gREFPARITY		r34.14
#define		gCHRMVADJ		r1.14
#define		gPARITY			r1.15
#define		gCBP_MASK		r1.1

#define		gMVSTEP			r1.13

#define		gpADDR			r1.2				// :uw (8 words)

#define		gSHAPETEMP		r8.15				// :uw

#define		gCOEFA			r42.0				
#define		gCOEFB			r42.1				
#define		gCOEFC			r42.2				
#define		gCOEFD			r42.3

// Weighted prediction
#define		gPREDFLAG0		r46.0
#define		gPREDFLAG1		r46.2

#define		gWEIGHTFLAG		r43.2
#define		gBIPRED			r43.3
#define		gYADD			r43.4
#define		gCADD			r43.5
#define		gYSHIFT			r43.6
#define		gCSHIFT			r43.7

#define		gOFFSET			r44.0
#define		gUOFFSET		r44.1
#define		gVOFFSET		r44.2

#define		gWT0			r45.0
#define		gO0				r45.1
#define		gWT1			r45.2
#define		gO1				r45.3
#define		gUW0			r45.4
#define		gUO0			r45.5
#define		gUW1			r45.6
#define		gUO1			r45.7	
#define		gVW0			r45.8	
#define		gVO0			r45.9	
#define		gVW1			r45.10	
#define		gVO1			r45.11

#define		gWT0_D			r45.0	
#define		gUW0_D			r45.2	

//------------ Message-related Registers & constants
#define		gMSGSRC			r2					// Message Source

#define		mMSGHDR			m1		
#define		mMSGHDRY		m1		
#define		mMSGHDRC		m2		
#define		mMSGHDR1		m1		
#define		mMSGHDR2		m2		
#define		mMSGHDR3		m3		
#define		mMSGHDR4		m4		
#define		mMSGHDRYW		m1		
#define		mMSGHDRCW		m10		

#ifdef DEV_ILK
	// 0000 0100(read)  0001(msg len) xxxx(resp len) 1010 (sampler cache) xxxx (field/frame) xxxx xxxx (bidx)
#define		nDWBRMSGDSC_SC		0x0208A002	// DWORD Block Read Message Descriptor through Data Port, Sampler Cache
#define		nDWBRMSGDSC_SC_TF	0x0208E602	// DWORD Block Read Message Descriptor through Data Port, Sampler Cache
#define		nDWBRMSGDSC_SC_BF	0x0208E702	// DWORD Block Read Message Descriptor through Data Port, Sampler Cache
	// 0000 0101(write) 0001(msg len) xxxx(resp len) 0010 (render cache)  xxxx (field/frame) xxxx xxxx (bidx)										
#define		nDWBWMSGDSC		0x02082000  // DWORD Block Write Message Descriptor through Data Port, Render Cache
#define		nDWBWMSGDSC_TF	0x02082600  // DWORD Block Write Message Descriptor through Data Port, Render Cache
#define		nDWBWMSGDSC_BF	0x02082700  // DWORD Block Write Message Descriptor through Data Port, Render Cache										

#else	// Pre DEV_ILK
	// 0000 0100(read)  0001(msg len) xxxx(resp len) 1010 (sampler cache) xxxx (field/frame) xxxx xxxx (bidx)
#define		nDWBRMSGDSC_SC		0x0410A002	// DWORD Block Read Message Descriptor through Data Port, Sampler Cache
#define		nDWBRMSGDSC_SC_TF	0x0410A602	// DWORD Block Read Message Descriptor through Data Port, Sampler Cache
#define		nDWBRMSGDSC_SC_BF	0x0410A702	// DWORD Block Read Message Descriptor through Data Port, Sampler Cache
	// 0000 0101(write) 0001(msg len) xxxx(resp len) 0010 (render cache)  xxxx (field/frame) xxxx xxxx (bidx)										
#define		nDWBWMSGDSC		0x05102000  // DWORD Block Write Message Descriptor through Data Port, Render Cache
#define		nDWBWMSGDSC_TF	0x05102600  // DWORD Block Write Message Descriptor through Data Port, Render Cache
#define		nDWBWMSGDSC_BF	0x05102700  // DWORD Block Write Message Descriptor through Data Port, Render Cache										
#endif	// DEV_ILK

#define		nDWB_FIELD_MASK	0x0600
										
// message data payload
.declare    mbMSGPAYLOADY	Base=m2  ElementSize=1 SrcRegion=REGION(16,1) Type=b
.declare    mbMSGPAYLOADC	Base=m11 ElementSize=1 SrcRegion=REGION(16,1) Type=b

// Destination registers for write commit
#define		gREG_WRITE_COMMIT_Y		r10.0
#define		gREG_WRITE_COMMIT_UV	r11.0

#define RETURN_REG_INTER	r1.5		// Return pointer for all sub-routine calls (type DWORD)

#define CALL_INTER(subFunc, skipInst)	add (1) RETURN_REG_INTER<1>:ud   ip:ud	1+skipInst*INST_SIZE \n\
				jmpi (1) subFunc
#define	RETURN_INTER		mov (1)	ip:ud	RETURN_REG_INTER<0;1,0>:ud		// Return to calling module


// End of inter_header.inc

#endif	// !defined(__INTER_HEADER__)

